I previously processed the raw sequencing data, optimized the barcode clustering, quantified the pDNA data and normalized the cDNA data. In this script, I want to have a detailed look at the cDNA data from a general perspective.
How to make a good rendering table:
| column1 | column2 | column3 |
|---|---|---|
| 1 | 2 | 3 |
| a | b | c |
knitr::opts_chunk$set(echo = TRUE)
StartTime <-Sys.time()
# 8-digit Date tag:
Date <- substr(gsub("-","",Sys.time()),1,8)
# libraries:
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(maditr)
library(tibble)
library(pheatmap)
library(ggpubr)
library(ggbeeswarm)
library(ggforce)
library(viridis)
library(plyr)
library(cowplot)
library(gridExtra)Functions used thoughout this script.
## `geom_smooth()` using formula 'y ~ x'
## Only ~10 of the 26 TFs in the library show promising activity at the first glimpse
## Prepare dataframe
# Caculate mean TF activity per condition
tf_activity_heatmap <- cDNA_df[cDNA_df$native_enhancer == "Yes",] %>%
select(TF, condition, tf_activity) %>% unique()
tf_activity_heatmap <- dcast(tf_activity_heatmap, condition ~ TF, value.var="tf_activity")
tf_activity_heatmap <- tf_activity_heatmap %>%
remove_rownames %>% column_to_rownames(var="condition")
comb_activity <- data.frame("pos" = colnames(tf_activity_heatmap),
"activity" = colSums(tf_activity_heatmap))
ggplot(comb_activity, aes(x = pos, y = activity)) +
geom_bar(stat = "identity", aes(fill = activity)) + theme_classic() +
theme(axis.title.y = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none") +
scale_fill_distiller(name = "activity", palette = "Greys", direction = 1)# Keeping the scale in the pheatmap function
myBreaks1 <- seq(0, 4, 0.04)
# pheatmap function
tf_activity_heatmap <- tf_activity_heatmap[c(6,2,1,3,4,7,5),]
pheatmap(as.matrix(tf_activity_heatmap),
color = colorRampPalette(brewer.pal(n = 7, name = "Greys"))(100),
breaks = myBreaks1, border_color = "black",
cellwidth = 10, cellheight = 10, cluster_rows = F, cluster_cols = F)# Based on heatmap: are there Tfcp2l1 or Tcf enrichments in e11_klf2_7 or Stat3 enrichments in e11_klf2_5 or Tcf enrichments in e97_klf2_5/6
# Based on https://molotool.autosome.ru/:
## 1xStat5 motif in e11_klf2_5
## no candidate motif in e11_klf2_7
## 1xTcf7l2 in e97_klf2_5 (but also in e97_klf2_4 -> why is this one not expressed)
## I should do a more systematic motif enrichment analysis
## Generate FIMO matrix
promoters <- c("TAGAGGGTATATAATGGAAGCTCGACTTCCAG",
"GGCGTTTACTATGGGAGGTCTATATAAGCAGAGCTCGTTTAGTGAACCGTCAGATC",
"GGGCTGGGCATAAAAGTCAGGGCAGAGCCATCTATTGCTTACATTTGCTTCT",
"GGTTAGCGATCCAATTCAGCTAGATTTTAAGC")
cDNA_df_native <- cDNA_df[cDNA_df$native_enhancer == "Yes",] %>% select(TF, seq) %>%
mutate(seq = gsub("CACGACGCTCTTCCGATCT.*", "", seq)) %>%
mutate(seq = gsub(paste(promoters, collapse = "|"), "", seq)) %>%
unique()
promoters <- data.frame(TF = c("minP", "mCMV", "hBGm", "Random"),
seq = promoters)
cDNA_df_native <- rbind(cDNA_df_native, promoters) %>%
setnames(c("TF", "seq"), c("seq.name", "seq.text"))
#dat2fasta(cDNA_df_native, outfile = "/DATA/usr/m.trauernicht/projects/tf_activity_reporter/data/SuRE_TF_1/results/native-enhancer/cDNA_df_native.fasta") # motfn=/home/f.comoglio/mydata/Annotations/TFDB/Curated_Natoli/update_2017/20170320_pwms_selected.meme
# odir=/home/m.trauernicht/mydata/projects/tf_activity_reporter/data/SuRE_TF_1/results/native-enhancer/fimo
# query=/home/m.trauernicht/mydata/projects/tf_activity_reporter/data/SuRE_TF_1/results/native-enhancer/cDNA_df_native.fasta
# nice -n 19 fimo --no-qvalue --thresh 1e-4 --verbosity 1 --o $odir $motfn $query We built a TF motif matrix using -log10 transformed FIMO scores. We used this feature encoding throughout the rest of this analysis, unless otherwise stated.
### All of these heatmaps conclude that there we have informative reporters for ~10 TFs, and that the TF reporter design matters for some but not all TFs
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "Run time: 2.555596 mins"
## [1] "/DATA/usr/m.trauernicht/projects/SuRE-TF"
## [1] "Thu Sep 24 14:47:08 2020"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.7 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 cowplot_1.0.0 plyr_1.8.6 viridis_0.5.1
## [5] viridisLite_0.3.0 ggforce_0.3.1 ggbeeswarm_0.6.0 ggpubr_0.2.5
## [9] magrittr_1.5 pheatmap_1.0.12 tibble_3.0.1 maditr_0.6.3
## [13] dplyr_0.8.5 ggplot2_3.3.0 RColorBrewer_1.1-2
##
## loaded via a namespace (and not attached):
## [1] beeswarm_0.2.3 tidyselect_0.2.5 xfun_0.12 purrr_0.3.3
## [5] splines_3.6.3 lattice_0.20-38 colorspace_1.4-1 vctrs_0.2.4
## [9] htmltools_0.4.0 yaml_2.2.0 mgcv_1.8-31 rlang_0.4.5
## [13] pillar_1.4.3 glue_1.3.1 withr_2.1.2 tweenr_1.0.1
## [17] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 ggsignif_0.6.0
## [21] gtable_0.3.0 evaluate_0.14 labeling_0.3 knitr_1.28
## [25] vipor_0.4.5 Rcpp_1.0.3 scales_1.1.0 farver_2.0.1
## [29] digest_0.6.23 stringi_1.4.6 polyclip_1.10-0 grid_3.6.3
## [33] tools_3.6.3 crayon_1.3.4 pkgconfig_2.0.3 Matrix_1.2-18
## [37] ellipsis_0.3.0 MASS_7.3-51.5 data.table_1.12.8 assertthat_0.2.1
## [41] rmarkdown_2.0 R6_2.4.1 nlme_3.1-143 compiler_3.6.3